# sobre la EH19
rm(list=ls())
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages("survey")
#install.packages("srvyr")
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(srvyr)
## 
## Attaching package: 'srvyr'
## The following object is masked from 'package:stats':
## 
##     filter
#bases
load(url("https://github.com/AlvaroLimber/R_aru/raw/master/data/eh19.RData"))

Diseños muestrales en R (estimación)

Existen dos librerías

No es el mismo que STATA… [w=], lo mas cercano a Stata es el prefijo svy…

Estimador de Horvitz Thompson.

Parámetro del Total,

\[t_y=\sum_U{y_k}\]

El estimador,

\[\hat{t}_y=\sum_s \frac{y_k}{\pi_k}=\sum_s {y_k*\frac{1}{\pi_k}}=\sum_s {y_k*w_k}\]

En R…

#survey
sd1<-svydesign(ids = ~upm, strata=~estrato,weights =~factor ,data=eh19p)
svyhist(~aestudio,design = sd1)

hist(eh19p$aestudio)

svymean(~aestudio,design=sd1,na.rm=T)
##            mean     SE
## aestudio 8.1501 0.0722
t1<-svyby(~aestudio,~depto+area,design=sd1, svymean, na.rm=T,deff = T)
table(cv(t1)>0.10)
## 
## FALSE 
##    18
cv(t1)
## Chuquisaca.Urbana     La Paz.Urbana Cochabamba.Urbana      Oruro.Urbana 
##        0.04112228        0.01359558        0.01917086        0.03222194 
##     Potosí.Urbana     Tarija.Urbana Santa Cruz.Urbana       Beni.Urbana 
##        0.04172358        0.02540172        0.01862134        0.03162047 
##      Pando.Urbana  Chuquisaca.Rural      La Paz.Rural  Cochabamba.Rural 
##        0.03068992        0.07204062        0.03434233        0.03935627 
##       Oruro.Rural      Potosí.Rural      Tarija.Rural  Santa Cruz.Rural 
##        0.03638799        0.06865782        0.03523620        0.04566724 
##        Beni.Rural       Pando.Rural 
##        0.05773332        0.03930840
confint(t1)
##                      2.5 %    97.5 %
## Chuquisaca.Urbana 8.376470  9.845096
## La Paz.Urbana     9.568880 10.092802
## Cochabamba.Urbana 9.050918  9.757635
## Oruro.Urbana      9.146942 10.380153
## Potosí.Urbana     7.514075  8.852477
## Tarija.Urbana     8.377137  9.254977
## Santa Cruz.Urbana 8.646720  9.301789
## Beni.Urbana       7.669275  8.682688
## Pando.Urbana      8.362137  9.432504
## Chuquisaca.Rural  4.258291  5.658515
## La Paz.Rural      6.043890  6.916233
## Cochabamba.Rural  4.815068  5.619996
## Oruro.Rural       5.861075  6.761292
## Potosí.Rural      3.964398  5.197253
## Tarija.Rural      5.244833  6.023009
## Santa Cruz.Rural  5.044238  6.035987
## Beni.Rural        5.473158  6.869832
## Pando.Rural       5.940493  6.932248
summary(sd1)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (1047) clusters.
## svydesign(ids = ~upm, strata = ~estrato, weights = ~factor, data = eh19p)
## Probabilities:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0004584 0.0029599 0.0042334 0.0051684 0.0060003 0.0418970 
## Stratum Sizes: 
##              11   12    13   14   21   22   23   24
## obs        4080 8769 10408 7491 2499 2554 2494 1310
## design.PSU   93  218   270  213   65   69   79   40
## actual.PSU   93  218   270  213   65   69   79   40
## Data variables:
##   [1] "folio"          "depto"          "area"           "nro"           
##   [5] "s02a_02"        "s02a_03"        "s02a_04a"       "s02a_04b"      
##   [9] "s02a_04c"       "s02a_05"        "s02a_06a"       "s02a_06b"      
##  [13] "s02a_06c"       "s02a_06d"       "s02a_06e"       "s02a_06_b"     
##  [17] "s02a_07_1"      "s02a_07_2"      "s02a_07_3"      "s02a_08"       
##  [21] "s02a_10"        "s03a_01a"       "s03a_01b"       "s03a_01c"      
##  [25] "s03a_01d"       "s03a_01d2_cod"  "s03a_01e"       "s03a_02"       
##  [29] "s03a_02e"       "s03a_03"        "s03a_03a"       "s03a_04"       
##  [33] "s03a_04npioc"   "s04a_01a"       "s04a_01b"       "s04a_01e"      
##  [37] "s04a_02a"       "s04a_02b"       "s04a_02e"       "s04a_03a"      
##  [41] "s04a_03b"       "s04a_03c"       "s04a_03d"       "s04a_03e"      
##  [45] "s04a_03f"       "s04a_03g"       "s04a_04a"       "s04a_04b"      
##  [49] "s04a_04e"       "S04A_0"         "S04A_1"         "S04A_2"        
##  [53] "s04a_05a"       "s04a_05b"       "s04a_05c"       "s04a_05d"      
##  [57] "s04a_05e"       "s04a_06a"       "s04a_07a"       "s04a_07a_e"    
##  [61] "s04a_06b"       "s04a_07b"       "s04a_07b_e"     "s04a_06c"      
##  [65] "s04a_07c"       "s04a_07c_e"     "s04a_06d"       "s04a_07d"      
##  [69] "s04a_07d_e"     "s04a_06e"       "s04a_07e"       "s04a_07e_e"    
##  [73] "s04a_06f"       "s04a_07f"       "s04a_07f_e"     "s04a_06g"      
##  [77] "s04a_07g"       "s04a_07g_e"     "s04a_08"        "s04a_08a1"     
##  [81] "s04a_08a2"      "s04a_08b"       "s04a_09"        "s04a_09a"      
##  [85] "s04b_11a"       "s04b_11b"       "s04b_12"        "s04b_13"       
##  [89] "s04b_14a"       "s04b_14b"       "s04b_15"        "s04b_15e"      
##  [93] "S04B_9"         "S04B_A"         "S04B_B"         "s04b_16"       
##  [97] "s04b_16e"       "S04B_6"         "S04B_7"         "S04B_8"        
## [101] "s04b_17"        "s04b_17e"       "S04B_3"         "S04B_4"        
## [105] "S04B_5"         "s04b_18"        "s04b_18e"       "S04B_0"        
## [109] "S04B_1"         "S04B_2"         "s04b_19"        "s04b_20a1"     
## [113] "s04b_20a2"      "s04b_20b"       "s04b_21a"       "s04b_21b"      
## [117] "s04b_21b2"      "s04c_22"        "s04c_23"        "s04d_24"       
## [121] "s04d_25"        "s04d_26"        "s04d_27a"       "s04d_27b"      
## [125] "s04e_28a"       "s04e_28b"       "s04e_29a"       "s04e_29b"      
## [129] "s04e_30a"       "s04e_30b"       "s04e_30c_cod"   "s04e_31a"      
## [133] "s04e_31b"       "s04e_31c"       "s04e_31d"       "s04e_31e"      
## [137] "s04e_31f"       "s04e_31_e"      "s04e_32a"       "s04e_32b"      
## [141] "s04e_33a"       "s04e_33b"       "s04_e_34a"      "s04f_34"       
## [145] "s04f_35a"       "s04f_35b"       "s04f_35c"       "s04f_35e"      
## [149] "s05a_01"        "s05a_01a"       "s05a_02a"       "s05a_02c"      
## [153] "s05a_03a"       "s05a_03c"       "s05a_04"        "s05a_05"       
## [157] "s05a_05_e"      "s05a_06a"       "s05a_06c"       "s05a_07a"      
## [161] "s05a_07b"       "s05a_08"        "s05a_09"        "s05b_10"       
## [165] "s05b_11"        "s05b_11_e"      "s05b_11a"       "s05c_13a"      
## [169] "s05c_13b"       "s05c_13c"       "s05c_13d"       "s05c_13e"      
## [173] "s05c_13f"       "s05c_13g"       "s05c_13h"       "s05c_13_e"     
## [177] "s05c_14a"       "s05c_14b"       "s05c_15a"       "s05c_15b"      
## [181] "s05d_17"        "s05d_18"        "s05d_19a"       "s05d_19b"      
## [185] "s05d_20a"       "s05d_20b"       "s05d_21a"       "s05d_21b"      
## [189] "s05d_21e"       "s05d_22a"       "s05d_22b"       "s05d_22c"      
## [193] "s05d_22d"       "s05d_22e"       "s05d_22f"       "s05d_22g"      
## [197] "s05d_22h"       "s05d_22i"       "s05d_22j"       "s05d_22k"      
## [201] "s05d_22l"       "s05d_22_e"      "s06a_01"        "s06a_02"       
## [205] "s06a_03"        "s06a_04"        "s06a_05"        "s06a_06aa"     
## [209] "s06a_06ab"      "s06a_06ac"      "s06a_06e"       "s06a_07"       
## [213] "s06a_08a"       "s06a_08b"       "s06a_09"        "s06a_09e"      
## [217] "s06a_10"        "s06a_10e"       "s06b_11a"       "s06b_11a_cod"  
## [221] "s06b_11b"       "s06b_12a"       "s06b_12a_cod"   "s06b_12b"      
## [225] "s06b_13"        "s06b_13a"       "s06b_13b"       "s06b_13c"      
## [229] "s06b_14"        "s06b_15aa"      "s06b_15ab"      "s06b_15ba"     
## [233] "s06b_15bb"      "s06b_15ca"      "s06b_15cb"      "s06b_15da"     
## [237] "s06b_15db"      "s06b_17"        "s06b_18"        "s06b_19a"      
## [241] "s06b_19b"       "s06b_20"        "s06b_20e"       "s06b_21a"      
## [245] "s06b_21b"       "s06b_22"        "s06b_23aa"      "s06b_23ab"     
## [249] "s06c_25a"       "s06c_25b"       "s06c_26a"       "s06c_26b"      
## [253] "s06c_27aa"      "s06c_27ab"      "s06c_27ba"      "s06c_27bb"     
## [257] "s06c_28a"       "s06c_28a1"      "s06c_28b"       "s06c_29a"      
## [261] "s06c_29b"       "s06c_30a"       "s06c_30a1"      "s06c_30a2"     
## [265] "s06c_30b"       "s06c_30b1"      "s06c_30b2"      "s06c_30c"      
## [269] "s06c_30c1"      "s06c_30c2"      "s06c_30d"       "s06c_30d1"     
## [273] "s06c_30d2"      "s06c_30e"       "s06c_30e1"      "s06c_30e2"     
## [277] "s06d_31a"       "s06d_31b"       "s06d_32aa"      "s06d_32ab"     
## [281] "s06d_32ba"      "s06d_32bb"      "s06d_32ca"      "s06d_32cb"     
## [285] "s06d_32da"      "s06d_32db"      "s06d_32ea"      "s06d_32eb"     
## [289] "s06d_32fa"      "s06d_32fb"      "s06d_32ga"      "s06d_32gb"     
## [293] "s06d_32ha"      "s06d_32hb"      "s06d_33a"       "s06d_33b"      
## [297] "s06d_34"        "s06e_35a"       "s06e_35a_cod"   "s06e_35b"      
## [301] "s06e_36"        "s06e_37"        "s06e_38a"       "s06e_38b"      
## [305] "s06e_39"        "s06e_40"        "s06e_40b"       "s06f_42a"      
## [309] "s06f_42b"       "s06f_43a"       "s06f_43a1"      "s06f_43b"      
## [313] "s06f_43b1"      "s06f_43c"       "s06f_43c1"      "s06f_44a"      
## [317] "s06f_44b"       "s06f_45aa"      "s06f_45ab"      "s06f_45ba"     
## [321] "s06f_45bb"      "s06f_45ca"      "s06f_45cb"      "s06f_45da"     
## [325] "s06f_45db"      "s06f_45ea"      "s06f_45eb"      "s06f_45fa"     
## [329] "s06f_45fb"      "s06f_45ga"      "s06f_45gb"      "s06f_45ha"     
## [333] "s06f_45hb"      "s06f_46a"       "s06f_46b"       "s06g_47"       
## [337] "s06g_48"        "s06g_49"        "s06g_49e"       "s06g_50"       
## [341] "s06g_50e"       "s06g_51"        "s06g_51e"       "s06g_52"       
## [345] "s06g_53"        "s06g_54"        "s06g_55"        "s07a_01a"      
## [349] "s07a_01b"       "s07a_01c"       "s07a_01d"       "s07a_01e"      
## [353] "s07a_01e0"      "s07a_01e1"      "s07a_01e1e"     "s07a_01e2"     
## [357] "s07a_01e2e"     "s07a_02a"       "s07a_02b"       "s07a_02c"      
## [361] "s07a_02ce"      "s07a_03a"       "s07a_03b"       "s07a_03c"      
## [365] "s07a_04a"       "s07a_04b"       "s07a_04c"       "s07a_04d"      
## [369] "s07b_05aa"      "s07b_05ab"      "s07b_05ba"      "s07b_05bb"     
## [373] "s07b_05ca"      "s07b_05cb"      "s07b_05da"      "s07b_05db"     
## [377] "s07b_05de"      "s07b_05ea"      "s07b_05eb"      "s07b_05ee"     
## [381] "s07c_06"        "s07c_07"        "s07c_08a"       "s07c_08b"      
## [385] "s07c_08e"       "s07c_09"        "s07c_09e"       "s07c_10"       
## [389] "s08a_01"        "s08a_03a"       "s08a_03b"       "s08a_03c"      
## [393] "s08a_03e"       "s08a_04"        "s08a_06"        "upm"           
## [397] "estrato"        "factor"         "tipohogar"      "cobersalud"    
## [401] "hnv_ult_a"      "quienatenparto" "dondeatenparto" "niv_ed"        
## [405] "niv_ed_g"       "cmasi"          "educ_prev"      "aestudio"      
## [409] "cob_op"         "caeb_op"        "pet"            "ocupado"       
## [413] "cesante"        "aspirante"      "desocupado"     "pea"           
## [417] "temporal"       "permanente"     "pei"            "condact"       
## [421] "phrs"           "shrs"           "tothrs"         "yprilab"       
## [425] "yseclab"        "ylab"           "ynolab"         "yper"          
## [429] "yhog"           "yhogpc"         "z"              "zext"          
## [433] "p0"             "p1"             "p2"             "pext0"         
## [437] "pext1"          "pext2"
hist(eh19p$factor)

summary(eh19p$factor)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   23.87  166.66  236.21  291.21  337.84 2181.61
quantile(eh19p$factor,c(0.05,0.1,0.9,0.95))
##        5%       10%       90%       95% 
##  78.91777 110.66443 514.98672 663.16821
#srvyr
sd2<-as_survey_design(sd1)
sd3<-eh19p %>% as_survey_design(ids=upm, strata=estrato, weights=factor)

sd2 %>% summarise(m_aes=survey_mean(aestudio,na.rm=T))
## # A tibble: 1 x 2
##   m_aes m_aes_se
##   <dbl>    <dbl>
## 1  8.15   0.0722
sd3 %>% summarise(m_aes=survey_mean(aestudio,na.rm=T))
## # A tibble: 1 x 2
##   m_aes m_aes_se
##   <dbl>    <dbl>
## 1  8.15   0.0722
sd3 %>% filter(s02a_03>=15) %>% summarise(m_aes=survey_mean(aestudio,na.rm=T))
## # A tibble: 1 x 2
##   m_aes m_aes_se
##   <dbl>    <dbl>
## 1  9.92   0.0800
t2<-sd3 %>% filter(s02a_03>=15) %>% group_by(depto,area) %>%  summarise(m_aes=survey_mean(aestudio,na.rm=T,deff=T,vartype=c("ci","cv","se")))
## Warning: The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## Please use the `.add` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
library(knitr)
kable(t2)
depto area m_aes m_aes_low m_aes_upp m_aes_cv m_aes_se m_aes_deff
Chuquisaca Urbana 10.895703 10.148803 11.642604 0.0349344 0.3806348 7.162531
Chuquisaca Rural 5.693202 4.608445 6.777959 0.0971005 0.5528128 11.249735
La Paz Urbana 11.595812 11.346048 11.845576 0.0109768 0.1272846 5.500503
La Paz Rural 7.761397 7.192433 8.330360 0.0373585 0.2899545 2.984142
Cochabamba Urbana 11.218832 10.845887 11.591777 0.0169412 0.1900601 7.237421
Cochabamba Rural 6.114481 5.454174 6.774787 0.0550341 0.3365048 4.769871
Oruro Urbana 11.637908 11.009817 12.265998 0.0275038 0.3200868 6.434192
Oruro Rural 7.429932 6.775212 8.084653 0.0449073 0.3336581 3.888534
Potosí Urbana 10.484944 9.787499 11.182389 0.0338992 0.3554312 4.338572
Potosí Rural 5.216560 4.368462 6.064659 0.0828529 0.4322073 6.059638
Tarija Urbana 10.821501 10.317403 11.325600 0.0237396 0.2568983 4.577131
Tarija Rural 6.882555 6.059210 7.705900 0.0609646 0.4195923 5.004415
Santa Cruz Urbana 11.053740 10.740448 11.367032 0.0144440 0.1596597 5.091124
Santa Cruz Rural 6.695770 6.124629 7.266911 0.0434699 0.2910645 2.997842
Beni Urbana 10.599824 10.070925 11.128722 0.0254284 0.2695368 4.254653
Beni Rural 8.195720 7.419166 8.972275 0.0482870 0.3957469 3.680029
Pando Urbana 11.428409 10.934781 11.922036 0.0220120 0.2515621 2.502778
Pando Rural 8.783820 8.247287 9.320353 0.0311286 0.2734276 2.258157

Librería GGPLOT

## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplot(eh19p,aes(ylab))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 23816 rows containing non-finite values (stat_bin).

ggplot(eh19p,aes(ylab))+geom_boxplot()
## Warning: Removed 23816 rows containing non-finite values (stat_boxplot).

ggplot(eh19p,aes(aestudio))+geom_bar()
## Warning: Removed 2903 rows containing non-finite values (stat_count).

ggplot(eh19p,aes(aestudio,weights=factor))+geom_bar()
## Warning: Removed 2903 rows containing non-finite values (stat_count).

ggplot(eh19p %>% filter(s02a_03>=15) ,aes(aestudio,weights=factor))+geom_bar()
## Warning: Removed 30 rows containing non-finite values (stat_count).

g1<-ggplot(eh19p %>% filter(s02a_03>=15) ,aes(aestudio,weights=factor))+geom_bar()

ggplotly(g1) 
## Warning: Removed 30 rows containing non-finite values (stat_count).